from lxml import etree

'''
xpath处理html练习
'''

html = '''
<html>
    <body>
        <ul>
            <li><a href="http://www.baidu.com">百度</a></li>
            <li><a href="http://www.google.com" id="google">谷歌</a></li>
            <li><a href="http://www.sogou.com">搜狗</a></li>
        </ul>
        <ol>
            <li><a href="plane">飞机</a></li>
            <li><a href="gun">大炮</a></li>
            <li><a href="train">火车</a></li>
        </ol>
        <div class="job">李嘉诚</div>
        <div class="common">胡辣汤</div>
    </body>
</html>
'''

et = etree.HTML(html)
result = et.xpath('/html/body/ul/li[2]/a/text()')[0]
print(result)